#-------------------------------------------------------------------------------
# Replication code for survey 2

# Packages
library(tidyverse); library(rio); library(cowplot); library(texreg)

# Set directory or change path to saved data
# Lines that save figures are commented out

# Data
# Respondents who did not consent already removed
tu <- import("survey2.csv")

#-------------------------------------------------------------------------------
# Initial cleaning

# Variable names to lower
colnames(tu) <- tolower(colnames(tu))

# Create and recode variables
tu <- tu %>%
  mutate(minutes = as.numeric(`duration (in seconds)`)/60, 
         pass_manip = ifelse(str_detect(abmanipluation, "Rising grocery"), 1, 0),
         believe = ifelse(str_detect(abbelievable, "unbeliev"), 0, 1)) %>%
  mutate(across(abeligible:abdoc.2, ~ifelse(. == "", NA, .))) %>%
  
  # Treatment manipulation
  mutate(manipulation = case_when(!is.na(`timerbaseline_first click`)  ~ "Baseline",
                                  !is.na(`timershort_first click`)  ~ "30 Minute Average",
                                  !is.na(`timerlong_first click`)  ~ "2 Hour Average",
                                  T ~ NA_character_),
         # DVs
         # Believe eligible
         eligible_char = coalesce(abeligible, abeligible.1, abeligible.2),
         eligible = ifelse(eligible_char == "Yes", 1, 0),
         # Likely apply
         apply_char = coalesce(abapply_1, abapply_1.1, abapply_1.2),
         apply = case_when(apply_char == "Strongly disagree" ~ 1, 
                           apply_char == "Disagree" ~ 2, 
                           apply_char == "Somewhat disagree" ~ 3, 
                           apply_char == "Neither agree nor disagree" ~ 4,
                           apply_char == "Somewhat agree" ~ 5, 
                           apply_char == "Agree" ~ 6,
                           apply_char == "Strongly agree" ~ 7,
                           T ~ NA_real_),
         # Helpful for people like me
         helpful_char = coalesce(abapply_2, abapply_2.1, abapply_2.2),
         helpful = case_when(helpful_char == "Strongly disagree" ~ 1, 
                             helpful_char == "Disagree" ~ 2, 
                             helpful_char == "Somewhat disagree" ~ 3, 
                             helpful_char == "Neither agree nor disagree" ~ 4,
                             helpful_char == "Somewhat agree" ~ 5, 
                             helpful_char == "Agree" ~ 6,
                             helpful_char == "Strongly agree" ~ 7,
                             T ~ NA_real_),
         # Collecting information worth my time
         worth_char = coalesce(abapply_3, abapply_3.1, abapply_3.2),
         worth = case_when(worth_char == "Strongly disagree" ~ 1, 
                           worth_char == "Disagree" ~ 2, 
                           worth_char == "Somewhat disagree" ~ 3, 
                           worth_char == "Neither agree nor disagree" ~ 4,
                           worth_char == "Somewhat agree" ~ 5, 
                           worth_char == "Agree" ~ 6,
                           worth_char == "Strongly agree" ~ 7,
                           T ~ NA_real_),
         # Learning more about program
         learn_char = coalesce(abapply_4, abapply_4.1, abapply_4.2),
         learn = case_when(learn_char == "Strongly disagree" ~ 1, 
                           learn_char == "Disagree" ~ 2, 
                           learn_char == "Somewhat disagree" ~ 3, 
                           learn_char == "Neither agree nor disagree" ~ 4,
                           learn_char == "Somewhat agree" ~ 5, 
                           learn_char == "Agree" ~ 6,
                           learn_char == "Strongly agree" ~ 7,
                           T ~ NA_real_),
         # Dichotomize
         apply_di = ifelse(apply >= 5, 1, 0),
         helpful_di = ifelse(helpful >= 5, 1, 0),
         worth_di = ifelse(worth >= 5, 1, 0),
         learn_di = ifelse(learn >= 5, 1, 0),
         # Enough support
         enough_char = coalesce(abmoney, abmoney.1, abmoney.2), 
         not_enough = ifelse(str_detect(enough_char, "too little"), 1, 0),
         # Documentation
         documentation = coalesce(abdoc, abdoc.1, abdoc.2),
         # Time estimation
         est_time = abesttime_1_text,
         no_est_time = abesttime,
         # Race
         race = case_when(ethnicity == 1 ~ "White",
                          ethnicity == 2 ~ "Black",
                          ethnicity == 3 ~ "American Indian",
                          ethnicity %in% seq(4, 14, 1) ~ "Asian/Pacific Islander",
                          ethnicity == 15 ~ "Other",
                          T ~ "Other"),
         race = ifelse(hispanic == 1, race, "Hispanic"),
         female = ifelse(gender == 2, "Female", "Male"),
         education = ifelse(education < 1, NA, education),
         hhi = ifelse(hhi == 25, NA, hhi)
         )

#-------------------------------------------------------------------------------
# Summary information 

# Timing
summary(tu$minutes)
median(tu$minutes)
table(tu$minutes>30)[2]
table(tu$minutes>15)[2]
table(tu$minutes<3)[2]

# Indicator for fewer than 3 minutes
tu <- tu %>%
  mutate(exclude_time = ifelse(minutes < 3, 1, 0))

# N per treatment
table(tu$manipulation)

# Manipulation check
table(tu$manipulation, tu$pass_manip)
sum(table(tu$manipulation, tu$pass_manip)[,2])/sum(table(tu$manipulation, tu$pass_manip))

# Believability
table(tu$believe)
prop.table(table(tu$manipulation, tu$believe), 1)

#------------------
# Means for each DV
tu %>%
  select(eligible, apply, helpful, worth, learn, not_enough) %>%
  summary()
tu %>%
  select(eligible, apply, helpful, worth, learn, not_enough) %>%
  summarize(across(everything(), ~ sd(.x, na.rm = TRUE)))

#-------------------------------------------------------------------------------
# Distributions

# Reshape to long format
long_tu <- tu %>%
  select(apply, helpful, worth, learn) %>%
  pivot_longer(cols = everything(), names_to = "question", values_to = "response") %>%
  filter(!is.na(response))

# Add labels
long_tu <- long_tu %>%
  mutate(
    question = recode(question,
                      apply = "Apply",
                      helpful = "Helpful for Me",
                      worth = "Worth Time",
                      learn = "Learn More"),
    response = factor(response, levels = 1:7,
                      labels = c("Strongly Disagree", "Disagree", "Somewhat Disagree",
                                 "Neither", "Somewhat Agree", "Agree", "Strongly Agree"))
  )

# Create the plot
ggplot(long_tu, aes(x = response)) +
  geom_bar(position = "dodge") +
  facet_wrap(~ question, ncol = 1) +
  labs(x = "", y = "Number of Respondents") +
  theme_bw(base_size = 12) +
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, hjust = 1))
# ggsave("distribution_s2.pdf", width = 10, height = 8)

#-------------------------------------------------------------------------------
# Difference of means

# apply, helpful, worth, learn

# Function to grab the difference of means between two variables
dif.of.means.fun = function(var1, var2){
  temp <- t.test(var1, var2)
  p <- temp$p.value
  mean1 <- temp$estimate[1]
  mean2 <- temp$estimate[2]
  lower <- temp$conf.int[1]
  upper <- temp$conf.int[2]
  temp2 <- t.test(var1, var2,conf.level = 0.9)
  lower90 <- temp2$conf.int[1]
  upper90 <- temp2$conf.int[2]
  difference <- mean1 - mean2 
  out <- data.frame("mean1" = mean1,
                   "mean2"= mean2,
                   "difference" = difference,
                   "lower95" = lower,
                   "upper95" = upper,
                   "lower90" = lower90,
                   "upper90" = upper90,
                   "p" = p)
  return(out)
}

# Sanity check
dif.of.means.fun(tu$apply[tu$manipulation == "30 Minute Average"],
                 tu$apply[tu$manipulation == "Baseline"])
dif.of.means.fun(tu$apply[tu$manipulation == "2 Hour Average"],
                 tu$apply[tu$manipulation == "Baseline"])

# All treatments and DVs
dif.of.means.df <- bind_rows(# apply
                             dif.of.means.fun(tu$apply[tu$manipulation == "30 Minute Average"],
                                              tu$apply[tu$manipulation == "Baseline"]),
                             dif.of.means.fun(tu$apply[tu$manipulation == "2 Hour Average"],
                                              tu$apply[tu$manipulation == "Baseline"]),
                             # helpful
                             dif.of.means.fun(tu$helpful[tu$manipulation == "30 Minute Average"],
                                              tu$helpful[tu$manipulation == "Baseline"]),
                             dif.of.means.fun(tu$helpful[tu$manipulation == "2 Hour Average"],
                                              tu$helpful[tu$manipulation == "Baseline"]),
                             # worth
                             dif.of.means.fun(tu$worth[tu$manipulation == "30 Minute Average"],
                                              tu$worth[tu$manipulation == "Baseline"]),
                             dif.of.means.fun(tu$worth[tu$manipulation == "2 Hour Average"],
                                              tu$worth[tu$manipulation == "Baseline"]),
                             # learn
                             dif.of.means.fun(tu$learn[tu$manipulation == "30 Minute Average"],
                                              tu$learn[tu$manipulation == "Baseline"]),
                             dif.of.means.fun(tu$learn[tu$manipulation == "2 Hour Average"],
                                              tu$learn[tu$manipulation == "Baseline"]))
# Outcome
dif.of.means.df$outcome <- c(rep(1, 2), rep(2, 2), rep(3, 2), rep(4, 2))
dif.of.means.df$outcome <- factor(dif.of.means.df$outcome,
                                  levels = 1:4, 
                                  labels = c("Apply", "Helpful for Me", "Worth Time", "Learn More"))

# Comparison
dif.of.means.df$compare <- c(rep(seq(1, 2), 4))
dif.of.means.df$compare <- factor(dif.of.means.df$compare,
                                  levels = 1:2, 
                                  labels = c("30 Minute Average - \nBaseline",
                                             "2 Hour Average - \nBaseline"))

# Plot
dif.of.mean.plot <- ggplot(dif.of.means.df, 
                          aes(x = fct_rev(compare), y = difference)) +
  geom_hline(yintercept = 0, color = "black", linetype = "dashed")+
  geom_errorbar(width = 0, linewidth = 0.75, color = "darkgrey",
                aes(ymin = lower95, ymax = upper95)) +
  geom_errorbar(width = 0, linewidth = 1.5,color="black",
                aes(ymin = lower90, ymax = upper90)) +
  geom_point(size = 5, fill = "black") +
  theme_bw(base_size =  14)+
  facet_wrap(~outcome, ncol = 4) + 
  coord_flip() +
  labs(y = "Difference in Means", x = ""); dif.of.mean.plot

#-------------------------------------------------------------------------------
# Difference of proportions

# eligible, not_enough

# Function to grab difference of proportions
prop.test.values <- function(generated.table, rounding.number = 3){
  n <- sum(generated.table)
  temp <- prop.test(generated.table)
  prop.1 <- temp$estimate[1]
  prop.2 <- temp$estimate[2]
  dif <- prop.1 - prop.2
  lower <- temp$conf.int[1]
  upper <- temp$conf.int[2]
  p <- temp$p.value
  temp2 <- prop.test(generated.table,conf.level = 0.9)
  lower90 <- temp2$conf.int[1]
  upper90 <- temp2$conf.int[2]
  out <- data.frame("n" = n,
                   "prop1" = prop.1,
                   "prop2" = prop.2,
                   "difference" = dif,
                   "lower95" = lower,
                   "upper95" = upper,
                   "lower90" = lower90,
                   "upper90" = upper90,
                   "p" = p)
  return(out)
}

# Sanity check
prop.test.values(table(tu$manipulation, tu$eligible)[c(1,2), ])

# Save as table
tab.elig <- table(tu$manipulation, tu$eligible)
tab.support <- table(tu$manipulation, tu$not_enough)

# For each treatment group
diff.prop <- bind_rows(prop.test.values(tab.elig[c(1, 2),]),
                       prop.test.values(tab.elig[c(1, 3),]),
                       prop.test.values(tab.support[c(1, 2),]),
                       prop.test.values(tab.support[c(1, 3),]))

# Add in comparison text
diff.prop$compare <- c(rep(c("30 Minute Average. - \nBaseline",
                       "2 Hour Average - \nBaseline"), 2))
diff.prop$compare <- fct_relevel(diff.prop$compare,
                                 "30 Minute Average. - \nBaseline",
                                 "2 Hour Average - \nBaseline")
diff.prop$condition <- c(rep("Eligible for Program", 2), rep("Not Enough Support", 2))

# Graph
diff.prop.graph <- ggplot(diff.prop, 
       aes(x = compare, y = difference)) +
  geom_hline(yintercept = 0, color = "black", linetype = "dashed")+
  geom_errorbar(width = 0, linewidth = 0.75, color = "darkgrey",
                aes(ymin = lower95, ymax = upper95)) +
 geom_errorbar(width = 0, linewidth=1.5,color="black",
                aes(ymin=lower90, ymax=upper90)) +
  geom_point(size = 5, fill = "black") +
  facet_wrap(~condition) +
  theme_bw(base_size = 14) +
  labs(y = "Difference in Proportion", x = ""); diff.prop.graph

# Combine graphs
plot_grid(diff.prop.graph, dif.of.mean.plot, nrow = 2,
          align = "hv", axis = "l", labels = c("a.", "b."),
          rel_heights = c(1, 1.2))
# ggsave("figure5.png", width = 12, height = 7, dpi = 300)

#-------------------------------------------------------------------------------
# Time estimation

# Calculate the median and mean for each group
stats <- tu %>%
  filter(est_time < 11) %>%
  group_by(manipulation) %>%
  summarize(
    median_est_time = median(est_time, na.rm = TRUE),
    mean_est_time = mean(est_time, na.rm = TRUE),
    sd_est_time = sd(est_time, na.rm   = TRUE)
  )

# Plot the histogram with a vertical line for the median and text labels for both median and mean
ggplot(filter(tu, est_time < 11), aes(est_time)) +
  geom_histogram(binwidth = 1, alpha = 0.5) +
  facet_wrap(~fct_rev(manipulation)) +
  geom_vline(data = stats, aes(xintercept = median_est_time), color = "black", linetype = "dashed", linewidth = 1) +
  geom_text(
    data = stats, 
    aes(x = median_est_time, y = Inf, label = paste("Median:", round(median_est_time, 1))), 
    vjust = 5, hjust = -.5, color = "black", size = 4
  ) +
  geom_text(
    data = stats, 
    aes(x = median_est_time, y = Inf, label = paste("Mean:", round(mean_est_time, 1))), 
    vjust = 7, hjust = -.5, color = "darkblue", size = 4
  ) +
  theme_bw(base_size = 14) +
  labs(y = "", x = "Estimated Time")
# ggsave("figure6.png", width = 9, height = 4, dpi = 300)

#-------------------------------------------------------------------------------
# Documentation 

# Documentation variables
tu.doc <- tu %>%
  mutate(equal = ifelse(documentation == "About equal", 1, 0),
         notsure = ifelse(documentation == "I'm not sure", 1, 0),
         identity = ifelse(documentation == "Proof of identity", 1, 0),
         income = ifelse(documentation == "Proof of income", 1, 0),
         benefits = ifelse(documentation == "Proof of other benefits", 1, 0),
         residency = ifelse(documentation == "Proof of residency", 1, 0),
         household = ifelse(documentation == "Proof of household resources", 1, 0))
tu.doc.1 <- tu.doc %>% filter(manipulation %in% c("Baseline", "30 Minute Average"))
tu.doc.2 <- tu.doc %>% filter(manipulation %in% c("Baseline", "2 Hour Average"))

# Difference of proportion
# For each treatment group and outcome
diff.prop.doc.1 <- bind_rows(prop.test.values(table(tu.doc.1$manipulation, tu.doc.1$equal)),
                             prop.test.values(table(tu.doc.1$manipulation, tu.doc.1$notsure)),
                             prop.test.values(table(tu.doc.1$manipulation, tu.doc.1$identity)),
                             prop.test.values(table(tu.doc.1$manipulation, tu.doc.1$income)),
                             prop.test.values(table(tu.doc.1$manipulation, tu.doc.1$benefits)),
                             prop.test.values(table(tu.doc.1$manipulation, tu.doc.1$residency)),
                             prop.test.values(table(tu.doc.1$manipulation, tu.doc.1$household)))
diff.prop.doc.2 <- bind_rows(prop.test.values(table(tu.doc.2$manipulation, tu.doc.2$equal)),
                             prop.test.values(table(tu.doc.2$manipulation, tu.doc.2$notsure)),
                             prop.test.values(table(tu.doc.2$manipulation, tu.doc.2$identity)),
                             prop.test.values(table(tu.doc.2$manipulation, tu.doc.2$income)),
                             prop.test.values(table(tu.doc.2$manipulation, tu.doc.2$benefits)),
                             prop.test.values(table(tu.doc.2$manipulation, tu.doc.2$residency)),
                             prop.test.values(table(tu.doc.2$manipulation, tu.doc.2$household)))

# Add in comparison text
diff.prop.doc.1$compare <- "30 Minute Average - \nBaseline"
diff.prop.doc.2$compare <- "2 Hour Average - \nBaseline"
diff.prop.doc.1$condition <- c("About equal", "I'm not sure", "Proof of identity", 
                           "Proof of income", "Proof of other benefits", 
                           "Proof of residency", "Proof of household resources")
diff.prop.doc.2$condition <- c("About equal", "I'm not sure", "Proof of identity", 
                             "Proof of income", "Proof of other benefits", 
                             "Proof of residency", "Proof of household resources")

# Combine and graph
diff.prop.documentation <- rbind(diff.prop.doc.1,
                                 diff.prop.doc.2)
# Change factor levels
diff.prop.documentation$compare <- fct_relevel(diff.prop.documentation$compare,
                                               "30 Minute Average - \nBaseline",
                                               "2 Hour Average - \nBaseline")
diff.prop.documentation$condition <- fct_relevel(diff.prop.documentation$condition,
                                                "I'm not sure", "About equal",
                                                "Proof of residency", "Proof of identity",
                                                "Proof of income", "Proof of other benefits", 
                                                "Proof of household resources")

ggplot(diff.prop.documentation, aes(x = fct_rev(condition), y = difference)) +
  geom_hline(yintercept = 0, color = "black", linetype = "dashed")+
  geom_errorbar(width = 0, linewidth = 0.75, color = "darkgrey",
                aes(ymin = lower95, ymax = upper95)) +
 geom_errorbar(width = 0, linewidth=1.5,color="black",
                aes(ymin=lower90, ymax=upper90)) +
  geom_point(size = 5, fill = "black") +
  facet_wrap(~compare) +
  theme_bw(base_size = 14) +
  coord_flip() +
  labs(y = "Difference in Proportion", x = "")
# ggsave("documentation_diff_s2.pdf", width = 10, height = 5)

#-------------------------------------------------------------------------------
# Robustness checks
#-------------------------------------------------------------------------------

#---------------------------------
# Dichotomous DVs 
# apply, helpful, worth, learn

# Sanity check
prop.test.values(table(tu$manipulation, tu$apply_di)[c(1,2), ])

# Save as table
tab.apply <- table(tu$manipulation, tu$apply_di)
tab.helpful <- table(tu$manipulation, tu$helpful_di)
tab.worth <- table(tu$manipulation, tu$worth_di)
tab.learn <- table(tu$manipulation, tu$learn_di)

# For each treatment group
diff.prop <- bind_rows(prop.test.values(tab.apply[c(1, 2),]),
                       prop.test.values(tab.apply[c(1, 3),]),
                       prop.test.values(tab.helpful[c(1, 2),]),
                       prop.test.values(tab.helpful[c(1, 3),]),
                       prop.test.values(tab.worth[c(1, 2),]),
                       prop.test.values(tab.worth[c(1, 3),]),
                       prop.test.values(tab.learn[c(1, 2),]),
                       prop.test.values(tab.learn[c(1, 3),]))

# Add in comparison text
diff.prop$compare <- c(rep(c("30 Minute Average. - \nBaseline",
                             "2 Hour Average - \nBaseline"), 4))
diff.prop$compare <- fct_relevel(diff.prop$compare,
                                 "30 Minute Average. - \nBaseline",
                                 "2 Hour Average - \nBaseline")
diff.prop$condition <- c(rep("Apply", 2), rep("Helpful for Me", 2), 
                         rep("Worth Time", 2), rep("Learn More", 2))

# Graph
diff.prop.graph <- ggplot(diff.prop, 
                          aes(x = compare, y = difference)) +
  geom_hline(yintercept = 0, color = "black", linetype = "dashed")+
  geom_errorbar(width = 0, linewidth = 0.75, color = "darkgrey",
                aes(ymin = lower95, ymax = upper95)) +
 geom_errorbar(width = 0, linewidth=1.5,color="black",
                aes(ymin=lower90, ymax=upper90)) +
  geom_point(size = 5, fill = "black") +
  facet_wrap(~condition, ncol = 2) +
  theme_bw(base_size = 14) +
  labs(y = "Difference in Proportion", x = ""); diff.prop.graph
# ggsave("di_dv_s2.pdf", width = 14, height = 7)

#---------------------------------
# Dropping inattentive respondents
# 1) Failed manipulation check
# 2) Took fewer than 3 minutes to complete survey
tu.ia <- tu %>%
  filter(pass_manip == 1, 
         minutes > 3)

# All treatments and DVs
dif.of.means.df <- bind_rows(# apply
  dif.of.means.fun(tu.ia$apply[tu.ia$manipulation == "30 Minute Average"],
                   tu.ia$apply[tu.ia$manipulation == "Baseline"]),
  dif.of.means.fun(tu.ia$apply[tu.ia$manipulation == "2 Hour Average"],
                   tu.ia$apply[tu.ia$manipulation == "Baseline"]),
  # helpful
  dif.of.means.fun(tu.ia$helpful[tu.ia$manipulation == "30 Minute Average"],
                   tu.ia$helpful[tu.ia$manipulation == "Baseline"]),
  dif.of.means.fun(tu.ia$helpful[tu.ia$manipulation == "2 Hour Average"],
                   tu.ia$helpful[tu.ia$manipulation == "Baseline"]),
  # worth
  dif.of.means.fun(tu.ia$worth[tu.ia$manipulation == "30 Minute Average"],
                   tu.ia$worth[tu.ia$manipulation == "Baseline"]),
  dif.of.means.fun(tu.ia$worth[tu.ia$manipulation == "2 Hour Average"],
                   tu.ia$worth[tu.ia$manipulation == "Baseline"]),
  # learn
  dif.of.means.fun(tu.ia$learn[tu.ia$manipulation == "30 Minute Average"],
                   tu.ia$learn[tu.ia$manipulation == "Baseline"]),
  dif.of.means.fun(tu.ia$learn[tu.ia$manipulation == "2 Hour Average"],
                   tu.ia$learn[tu.ia$manipulation == "Baseline"]))
# Outcome
dif.of.means.df$outcome <- c(rep(1, 2), rep(2, 2), rep(3, 2), rep(4, 2))
dif.of.means.df$outcome <- factor(dif.of.means.df$outcome,
                                  levels = 1:4, 
                                  labels = c("Apply", "Helpful for Me", "Worth Time", "Learn More"))

# Comparison
dif.of.means.df$compare <- c(rep(seq(1, 2), 4))
dif.of.means.df$compare <- factor(dif.of.means.df$compare,
                                  levels = 1:2, 
                                  labels = c("30 Minute Average - \nBaseline",
                                             "2 Hour Average - \nBaseline"))

# Plot
dif.of.mean.plot <- ggplot(dif.of.means.df, 
                           aes(x = fct_rev(compare), y = difference)) +
  geom_hline(yintercept = 0, color = "black", linetype = "dashed")+
  geom_errorbar(width = 0, linewidth = 0.75, color = "darkgrey",
                aes(ymin = lower95, ymax = upper95)) +
 geom_errorbar(width = 0, linewidth=1.5,color="black",
                aes(ymin=lower90, ymax=upper90)) +
  geom_point(size = 5, fill = "black") +
  theme_bw(base_size =  14)+
  facet_wrap(~outcome, ncol = 4) + 
  coord_flip() +
  labs(y = "Difference in Means", x = ""); dif.of.mean.plot

# Save as table
tab.elig <- table(tu.ia$manipulation, tu.ia$eligible)
tab.support <- table(tu.ia$manipulation, tu.ia$not_enough)

# For each treatment group
diff.prop <- bind_rows(prop.test.values(tab.elig[c(1, 2),]),
                       prop.test.values(tab.elig[c(1, 3),]),
                       prop.test.values(tab.support[c(1, 2),]),
                       prop.test.values(tab.support[c(1, 3),]))

# Add in comparison text
diff.prop$compare <- c(rep(c("30 Minute Average. - \nBaseline",
                             "2 Hour Average - \nBaseline"), 2))
diff.prop$compare <- fct_relevel(diff.prop$compare,
                                 "30 Minute Average. - \nBaseline",
                                 "2 Hour Average - \nBaseline")
diff.prop$condition <- c(rep("Eligible for Program", 2), rep("Not Enough Support", 2))

# Graph
diff.prop.graph <- ggplot(diff.prop, 
                          aes(x = compare, y = difference)) +
  geom_hline(yintercept = 0, color = "black", linetype = "dashed")+
  geom_errorbar(width = 0, linewidth = 0.75, color = "darkgrey",
                aes(ymin = lower95, ymax = upper95)) +
 geom_errorbar(width = 0, linewidth=1.5,color="black",
                aes(ymin=lower90, ymax=upper90)) +
  geom_point(size = 5, fill = "black") +
  facet_wrap(~condition) +
  theme_bw(base_size = 14) +
  labs(y = "Difference in Proportion", x = ""); diff.prop.graph

# Combine graphs
plot_grid(diff.prop.graph, dif.of.mean.plot, nrow = 2,
          align = "hv", axis = "l", labels = c("a.", "b."),
          rel_heights = c(1, 1.2))
# ggsave("composite_s2_attentive.pdf", width = 12, height = 7)

#-----------------------------------
# Prior contact with social welfare
# Documentation 
tu.personal <- tu %>%
  filter(str_detect(personal, "Medicaid|Unemployment|SNAP|Medicare|TANF"))
nrow(tu.personal)/nrow(tu)

# All treatments and DVs
dif.of.means.df <- bind_rows(# apply
  dif.of.means.fun(tu.personal$apply[tu.personal$manipulation == "30 Minute Average"],
                   tu.personal$apply[tu.personal$manipulation == "Baseline"]),
  dif.of.means.fun(tu.personal$apply[tu.personal$manipulation == "2 Hour Average"],
                   tu.personal$apply[tu.personal$manipulation == "Baseline"]),
  # helpful
  dif.of.means.fun(tu.personal$helpful[tu.personal$manipulation == "30 Minute Average"],
                   tu.personal$helpful[tu.personal$manipulation == "Baseline"]),
  dif.of.means.fun(tu.personal$helpful[tu.personal$manipulation == "2 Hour Average"],
                   tu.personal$helpful[tu.personal$manipulation == "Baseline"]),
  # worth
  dif.of.means.fun(tu.personal$worth[tu.personal$manipulation == "30 Minute Average"],
                   tu.personal$worth[tu.personal$manipulation == "Baseline"]),
  dif.of.means.fun(tu.personal$worth[tu.personal$manipulation == "2 Hour Average"],
                   tu.personal$worth[tu.personal$manipulation == "Baseline"]),
  # learn
  dif.of.means.fun(tu.personal$learn[tu.personal$manipulation == "30 Minute Average"],
                   tu.personal$learn[tu.personal$manipulation == "Baseline"]),
  dif.of.means.fun(tu.personal$learn[tu.personal$manipulation == "2 Hour Average"],
                   tu.personal$learn[tu.personal$manipulation == "Baseline"]))
# Outcome
dif.of.means.df$outcome <- c(rep(1, 2), rep(2, 2), rep(3, 2), rep(4, 2))
dif.of.means.df$outcome <- factor(dif.of.means.df$outcome,
                                  levels = 1:4, 
                                  labels = c("Apply", "Helpful for Me", "Worth Time", "Learn More"))

# Comparison
dif.of.means.df$compare <- c(rep(seq(1, 2), 4))
dif.of.means.df$compare <- factor(dif.of.means.df$compare,
                                  levels = 1:2, 
                                  labels = c("30 Minute Average - \nBaseline",
                                             "2 Hour Average - \nBaseline"))
# Plot
dif.of.mean.plot <- ggplot(dif.of.means.df, 
                           aes(x = fct_rev(compare), y = difference)) +
  geom_hline(yintercept = 0, color = "black", linetype = "dashed")+
  geom_errorbar(width = 0, linewidth = 0.75, color = "darkgrey",
                aes(ymin = lower95, ymax = upper95)) +
 geom_errorbar(width = 0, linewidth=1.5,color="black",
                aes(ymin=lower90, ymax=upper90)) +
  geom_point(size = 5, fill = "black") +
  theme_bw(base_size =  14)+
  facet_wrap(~outcome, ncol = 4) + 
  coord_flip() +
  labs(y = "Difference in Means", x = ""); dif.of.mean.plot

# Save as table
tab.elig <- table(tu.personal$manipulation, tu.personal$eligible)
tab.support <- table(tu.personal$manipulation, tu.personal$not_enough)

# For each treatment group
diff.prop <- bind_rows(prop.test.values(tab.elig[c(1, 2),]),
                       prop.test.values(tab.elig[c(1, 3),]),
                       prop.test.values(tab.support[c(1, 2),]),
                       prop.test.values(tab.support[c(1, 3),]))

# Add in comparison text
diff.prop$compare <- c(rep(c("30 Minute Average. - \nBaseline",
                             "2 Hour Average - \nBaseline"), 2))
diff.prop$compare <- fct_relevel(diff.prop$compare,
                                 "30 Minute Average. - \nBaseline",
                                 "2 Hour Average - \nBaseline")
diff.prop$condition <- c(rep("Eligible for Program", 2), rep("Not Enough Support", 2))

# Graph
diff.prop.graph <- ggplot(diff.prop, 
                          aes(x = compare, y = difference)) +
  geom_hline(yintercept = 0, color = "black", linetype = "dashed")+
  geom_errorbar(width = 0, linewidth = 0.75, color = "darkgrey",
                aes(ymin = lower95, ymax = upper95)) +
 geom_errorbar(width = 0, linewidth=1.5,color="black",
                aes(ymin=lower90, ymax=upper90)) +
  geom_point(size = 5, fill = "black") +
  facet_wrap(~condition) +
  theme_bw(base_size = 14) +
  labs(y = "Difference in Proportion", x = ""); diff.prop.graph

# Combine graphs
plot_grid(diff.prop.graph, dif.of.mean.plot, nrow = 2,
          align = "hv", axis = "l", labels = c("a.", "b."),
          rel_heights = c(1, 1.2))
# ggsave("composite_s2_personal.pdf", width = 12, height = 7)

# Time estimation
# Just in estimating time category
tu.time <- tu.personal

# Calculate the median and mean for each group
stats <- tu.time %>%
  filter(est_time < 11) %>%
  group_by(manipulation) %>%
  summarize(
    median_est_time = median(est_time, na.rm = TRUE),
    mean_est_time = mean(est_time, na.rm = TRUE),
    sd_est_time = sd(est_time, na.rm   = TRUE)
  )

# Plot the histogram with a vertical line for the median and text labels for both median and mean
ggplot(filter(tu.time, est_time < 11), aes(est_time)) +
  geom_histogram(binwidth = 1, alpha = 0.5) +
  facet_wrap(~fct_rev(manipulation)) +
  geom_vline(data = stats, aes(xintercept = median_est_time), color = "black", linetype = "dashed", size = 1) +
  geom_text(
    data = stats, 
    aes(x = median_est_time, y = Inf, label = paste("Median:", round(median_est_time, 1))), 
    vjust = 5, hjust = -.5, color = "black", size = 4
  ) +
  geom_text(
    data = stats, 
    aes(x = median_est_time, y = Inf, label = paste("Mean:", round(mean_est_time, 1))), 
    vjust = 7, hjust = -.5, color = "darkblue", size = 4
  ) +
  theme_bw(base_size = 14) +
  labs(y = "", x = "Estimated Time")
# ggsave("time_hist_prior_s2.pdf", width = 9, height = 4)

# Documentation 

# Documentation variables
tu.doc <- tu.personal %>%
  mutate(equal = ifelse(documentation == "About equal", 1, 0),
         notsure = ifelse(documentation == "I'm not sure", 1, 0),
         identity = ifelse(documentation == "Proof of identity", 1, 0),
         income = ifelse(documentation == "Proof of income", 1, 0),
         benefits = ifelse(documentation == "Proof of other benefits", 1, 0),
         residency = ifelse(documentation == "Proof of residency", 1, 0),
         household = ifelse(documentation == "Proof of household resources", 1, 0))
tu.doc.1 <- tu.doc %>% filter(manipulation %in% c("Baseline", "30 Minute Average"))
tu.doc.2 <- tu.doc %>% filter(manipulation %in% c("Baseline", "2 Hour Average"))

# Difference of proportion
# For each treatment group and outcome
diff.prop.doc.1 <- bind_rows(prop.test.values(table(tu.doc.1$manipulation, tu.doc.1$equal)),
                             prop.test.values(table(tu.doc.1$manipulation, tu.doc.1$notsure)),
                             prop.test.values(table(tu.doc.1$manipulation, tu.doc.1$identity)),
                             prop.test.values(table(tu.doc.1$manipulation, tu.doc.1$income)),
                             prop.test.values(table(tu.doc.1$manipulation, tu.doc.1$benefits)),
                             prop.test.values(table(tu.doc.1$manipulation, tu.doc.1$residency)),
                             prop.test.values(table(tu.doc.1$manipulation, tu.doc.1$household)))
diff.prop.doc.2 <- bind_rows(prop.test.values(table(tu.doc.2$manipulation, tu.doc.2$equal)),
                             prop.test.values(table(tu.doc.2$manipulation, tu.doc.2$notsure)),
                             prop.test.values(table(tu.doc.2$manipulation, tu.doc.2$identity)),
                             prop.test.values(table(tu.doc.2$manipulation, tu.doc.2$income)),
                             prop.test.values(table(tu.doc.2$manipulation, tu.doc.2$benefits)),
                             prop.test.values(table(tu.doc.2$manipulation, tu.doc.2$residency)),
                             prop.test.values(table(tu.doc.2$manipulation, tu.doc.2$household)))

# Add in comparison text
diff.prop.doc.1$compare <- "30 Minute Average - \nBaseline"
diff.prop.doc.2$compare <- "2 Hour Average - \nBaseline"
diff.prop.doc.1$condition <- c("About equal", "I'm not sure", "Proof of identity", 
                               "Proof of income", "Proof of other benefits", 
                               "Proof of residency", "Proof of household resources")
diff.prop.doc.2$condition <- c("About equal", "I'm not sure", "Proof of identity", 
                               "Proof of income", "Proof of other benefits", 
                               "Proof of residency", "Proof of household resources")

# Combine and graph
diff.prop.documentation <- rbind(diff.prop.doc.1,
                                 diff.prop.doc.2)
# Change factor levels
diff.prop.documentation$compare <- fct_relevel(diff.prop.documentation$compare,
                                               "30 Minute Average - \nBaseline",
                                               "2 Hour Average - \nBaseline")
diff.prop.documentation$condition <- fct_relevel(diff.prop.documentation$condition,
                                                 "I'm not sure", "About equal",
                                                 "Proof of residency", "Proof of identity",
                                                 "Proof of income", "Proof of other benefits", 
                                                 "Proof of household resources")

ggplot(diff.prop.documentation, aes(x = fct_rev(condition), y = difference)) +
  geom_hline(yintercept = 0, color = "black", linetype = "dashed")+
  geom_errorbar(width = 0, linewidth = 0.75, color = "darkgrey",
                aes(ymin = lower95, ymax = upper95)) +
 geom_errorbar(width = 0, linewidth=1.5,color="black",
                aes(ymin=lower90, ymax=upper90)) +
  geom_point(size = 5, fill = "black") +
  facet_wrap(~compare) +
  theme_bw(base_size = 14) +
  coord_flip() +
  labs(y = "Difference in Proportion", x = "")
# ggsave("documentation_diff_prior_s2.pdf", width = 10, height = 5)

#--------------------------------
# Not eligible by reported income
# hhi: under 14 (14 = $75,000 to $79,999)
# hhi: under 20 (20 = $125,000 to $149,999)
nrow(filter(tu, hhi < 20))/nrow(tu)
nrow(filter(tu, hhi < 14))/nrow(tu)

# Assuming household income under $125,000
tu.125 <- tu %>%
  filter(hhi < 20)

# All treatments and DVs
dif.of.means.df <- bind_rows(# apply
  dif.of.means.fun(tu.125$apply[tu.125$manipulation == "30 Minute Average"],
                   tu.125$apply[tu.125$manipulation == "Baseline"]),
  dif.of.means.fun(tu.125$apply[tu.125$manipulation == "2 Hour Average"],
                   tu.125$apply[tu.125$manipulation == "Baseline"]),
  # helpful
  dif.of.means.fun(tu.125$helpful[tu.125$manipulation == "30 Minute Average"],
                   tu.125$helpful[tu.125$manipulation == "Baseline"]),
  dif.of.means.fun(tu.125$helpful[tu.125$manipulation == "2 Hour Average"],
                   tu.125$helpful[tu.125$manipulation == "Baseline"]),
  # worth
  dif.of.means.fun(tu.125$worth[tu.125$manipulation == "30 Minute Average"],
                   tu.125$worth[tu.125$manipulation == "Baseline"]),
  dif.of.means.fun(tu.125$worth[tu.125$manipulation == "2 Hour Average"],
                   tu.125$worth[tu.125$manipulation == "Baseline"]),
  # learn
  dif.of.means.fun(tu.125$learn[tu.125$manipulation == "30 Minute Average"],
                   tu.125$learn[tu.125$manipulation == "Baseline"]),
  dif.of.means.fun(tu.125$learn[tu.125$manipulation == "2 Hour Average"],
                   tu.125$learn[tu.125$manipulation == "Baseline"]))
# Outcome
dif.of.means.df$outcome <- c(rep(1, 2), rep(2, 2), rep(3, 2), rep(4, 2))
dif.of.means.df$outcome <- factor(dif.of.means.df$outcome,
                                  levels = 1:4, 
                                  labels = c("Apply", "Helpful for Me", "Worth Time", "Learn More"))

# Comparison
dif.of.means.df$compare <- c(rep(seq(1, 2), 4))
dif.of.means.df$compare <- factor(dif.of.means.df$compare,
                                  levels = 1:2, 
                                  labels = c("30 Minute Average - \nBaseline",
                                             "2 Hour Average - \nBaseline"))

# Plot
dif.of.mean.plot <- ggplot(dif.of.means.df, 
                           aes(x = fct_rev(compare), y = difference)) +
  geom_hline(yintercept = 0, color = "black", linetype = "dashed")+
  geom_errorbar(width = 0, linewidth = 0.75, color = "darkgrey",
                aes(ymin = lower95, ymax = upper95)) +
 geom_errorbar(width = 0, linewidth=1.5,color="black",
                aes(ymin=lower90, ymax=upper90)) +
  geom_point(size = 5, fill = "black") +
  theme_bw(base_size =  14)+
  facet_wrap(~outcome, ncol = 4) + 
  coord_flip() +
  labs(y = "Difference in Means", x = ""); dif.of.mean.plot

# Save as table
tab.elig <- table(tu.125$manipulation, tu.125$eligible)
tab.support <- table(tu.125$manipulation, tu.125$not_enough)

# For each treatment group
diff.prop <- bind_rows(prop.test.values(tab.elig[c(1, 2),]),
                       prop.test.values(tab.elig[c(1, 3),]),
                       prop.test.values(tab.support[c(1, 2),]),
                       prop.test.values(tab.support[c(1, 3),]))

# Add in comparison text
diff.prop$compare <- c(rep(c("30 Minute Average. - \nBaseline",
                             "2 Hour Average - \nBaseline"), 2))
diff.prop$compare <- fct_relevel(diff.prop$compare,
                                 "30 Minute Average. - \nBaseline",
                                 "2 Hour Average - \nBaseline")
diff.prop$condition <- c(rep("Eligible for Program", 2), rep("Not Enough Support", 2))

# Graph
diff.prop.graph <- ggplot(diff.prop, 
                          aes(x = compare, y = difference)) +
  geom_hline(yintercept = 0, color = "black", linetype = "dashed")+
  geom_errorbar(width = 0, linewidth = 0.75, color = "darkgrey",
                aes(ymin = lower95, ymax = upper95)) +
 geom_errorbar(width = 0, linewidth=1.5,color="black",
                aes(ymin=lower90, ymax=upper90)) +
  geom_point(size = 5, fill = "black") +
  facet_wrap(~condition) +
  theme_bw(base_size = 14) +
  labs(y = "Difference in Proportion", x = ""); diff.prop.graph

# Combine graphs
plot_grid(diff.prop.graph, dif.of.mean.plot, nrow = 2,
          align = "hv", axis = "l", labels = c("a.", "b."),
          rel_heights = c(1, 1.2))
# ggsave("composite_s2_125.pdf", width = 16, height = 8)

# Assuming income under $75,000
tu.75 <- tu %>%
  filter(hhi < 14)

# All treatments and DVs
dif.of.means.df <- bind_rows(# apply
  dif.of.means.fun(tu.75$apply[tu.75$manipulation == "30 Minute Average"],
                   tu.75$apply[tu.75$manipulation == "Baseline"]),
  dif.of.means.fun(tu.75$apply[tu.75$manipulation == "2 Hour Average"],
                   tu.75$apply[tu.75$manipulation == "Baseline"]),
  # helpful
  dif.of.means.fun(tu.75$helpful[tu.75$manipulation == "30 Minute Average"],
                   tu.75$helpful[tu.75$manipulation == "Baseline"]),
  dif.of.means.fun(tu.75$helpful[tu.75$manipulation == "2 Hour Average"],
                   tu.75$helpful[tu.75$manipulation == "Baseline"]),
  # worth
  dif.of.means.fun(tu.75$worth[tu.75$manipulation == "30 Minute Average"],
                   tu.75$worth[tu.75$manipulation == "Baseline"]),
  dif.of.means.fun(tu.75$worth[tu.75$manipulation == "2 Hour Average"],
                   tu.75$worth[tu.75$manipulation == "Baseline"]),
  # learn
  dif.of.means.fun(tu.75$learn[tu.75$manipulation == "30 Minute Average"],
                   tu.75$learn[tu.75$manipulation == "Baseline"]),
  dif.of.means.fun(tu.75$learn[tu.75$manipulation == "2 Hour Average"],
                   tu.75$learn[tu.75$manipulation == "Baseline"]))
# Outcome
dif.of.means.df$outcome <- c(rep(1, 2), rep(2, 2), rep(3, 2), rep(4, 2))
dif.of.means.df$outcome <- factor(dif.of.means.df$outcome,
                                  levels = 1:4, 
                                  labels = c("Apply", "Helpful for Me", "Worth Time", "Learn More"))

# Comparison
dif.of.means.df$compare <- c(rep(seq(1, 2), 4))
dif.of.means.df$compare <- factor(dif.of.means.df$compare,
                                  levels = 1:2, 
                                  labels = c("30 Minute Average - \nBaseline",
                                             "2 Hour Average - \nBaseline"))

# Plot
dif.of.mean.plot <- ggplot(dif.of.means.df, 
                           aes(x = fct_rev(compare), y = difference)) +
  geom_hline(yintercept = 0, color = "black", linetype = "dashed")+
  geom_errorbar(width = 0, linewidth = 0.75, color = "darkgrey",
                aes(ymin = lower95, ymax = upper95)) +
 geom_errorbar(width = 0, linewidth=1.5,color="black",
                aes(ymin=lower90, ymax=upper90)) +
  geom_point(size = 5, fill = "black") +
  theme_bw(base_size =  14)+
  facet_wrap(~outcome, ncol = 4) + 
  coord_flip() +
  labs(y = "Difference in Means", x = ""); dif.of.mean.plot

# Save as table
tab.elig <- table(tu.75$manipulation, tu.75$eligible)
tab.support <- table(tu.75$manipulation, tu.75$not_enough)

# For each treatment group
diff.prop <- bind_rows(prop.test.values(tab.elig[c(1, 2),]),
                       prop.test.values(tab.elig[c(1, 3),]),
                       prop.test.values(tab.support[c(1, 2),]),
                       prop.test.values(tab.support[c(1, 3),]))

# Add in comparison text
diff.prop$compare <- c(rep(c("30 Minute Average. - \nBaseline",
                             "2 Hour Average - \nBaseline"), 2))
diff.prop$compare <- fct_relevel(diff.prop$compare,
                                 "30 Minute Average. - \nBaseline",
                                 "2 Hour Average - \nBaseline")
diff.prop$condition <- c(rep("Eligible for Program", 2), rep("Not Enough Support", 2))

# Graph
diff.prop.graph <- ggplot(diff.prop, 
                          aes(x = compare, y = difference)) +
  geom_hline(yintercept = 0, color = "black", linetype = "dashed")+
  geom_errorbar(width = 0, linewidth = 0.75, color = "darkgrey",
                aes(ymin = lower95, ymax = upper95)) +
 geom_errorbar(width = 0, linewidth=1.5,color="black",
                aes(ymin=lower90, ymax=upper90)) +
  geom_point(size = 5, fill = "black") +
  facet_wrap(~condition) +
  theme_bw(base_size = 14) +
  labs(y = "Difference in Proportion", x = ""); diff.prop.graph

# Combine graphs
plot_grid(diff.prop.graph, dif.of.mean.plot, nrow = 2,
          align = "hv", axis = "l", labels = c("a.", "b."),
          rel_heights = c(1, 1.2))
# ggsave("composite_s2_75.pdf", width = 16, height = 8)

#-------------------------------------------------------------------------------
# Balance checks

tu.balance <- tu %>%
  mutate(
    baseline = ifelse(manipulation == "Baseline", 1, 0),
    av30 = ifelse(manipulation == "30 Minute Average", 1, 0),
    av2 = ifelse(manipulation == "2 Hour Average", 1, 0)
  )

reg.balance1 = lm(baseline ~ factor(female) +
                    factor(race) +
                    as.numeric(age) + factor(party) + 
                    factor(urbanrural) + factor(region) +
                    as.numeric(hhi) + as.numeric(education), 
                  data = tu.balance)
reg.balance2 = lm(av30 ~ factor(female) +
                    factor(race) +
                    as.numeric(age) + factor(party) + 
                    factor(urbanrural) + factor(region) +
                    as.numeric(hhi) + as.numeric(education), 
                  data = tu.balance)
reg.balance3 = lm(av2 ~ factor(female) +
                    factor(race) +
                    as.numeric(age) + factor(party) + 
                    factor(urbanrural) + factor(region) +
                    as.numeric(hhi) + as.numeric(education), 
                  data = tu.balance)

texreg(list(reg.balance1, reg.balance2, reg.balance3))

#-------------------------------------------------------------------------------
# Difference in time

tu.diff.time <- tu.balance %>%
  filter(baseline == 0, 
         est_time <= 10,
         !is.na(est_time)) %>%
  mutate(time_diff = case_when(av30 == 1 ~ est_time - 0.5,
                               av2 == 1 ~ est_time -2)) %>%
  mutate(prior_exp = ifelse(str_detect(personal, "Medicaid|Unemployment|SNAP|Medicare|TANF"), 1, 0))

# Eligible
m1 <- lm(eligible ~ av2 + time_diff, data = tu.diff.time); summary(m1)

# Apply
m2 <- lm(apply ~ av2 + time_diff, data = tu.diff.time); summary(m2)

# Helpful
m3 <- lm(helpful ~ av2 + time_diff, data = tu.diff.time); summary(m3)

# Worth
m4 <- lm(worth ~ av2 + time_diff, data = tu.diff.time); summary(m4)

# Learn
m5 <- lm(learn ~ av2 + time_diff, data = tu.diff.time); summary(m5)

# Not Enough
m6 <- lm(not_enough ~ av2 + time_diff, data = tu.diff.time); summary(m6)

texreg(list(m1, m2, m3, m4, m5, m6), stars = c(0.05, 0.1))

#----------------------------
# Dichotomous DVs

# Apply
m2 <- lm(apply_di ~ av2 + time_diff, data = tu.diff.time); summary(m2)

# Helpful
m3 <- lm(helpful_di ~ av2 + time_diff, data = tu.diff.time); summary(m3)

# Worth
m4 <- lm(worth_di ~ av2 + time_diff, data = tu.diff.time); summary(m4)

# Learn
m5 <- lm(learn_di ~ av2 + time_diff, data = tu.diff.time); summary(m5)


texreg(list(m2, m3, m4, m5), stars = c(0.05, 0.1))

#-----------------------------
# Moderated by prior experience

# Eligible
m1 <- lm(eligible ~ av2 + time_diff*prior_exp, data = tu.diff.time); summary(m1)

# Apply
m2 <- lm(apply ~ av2 + time_diff*prior_exp, data = tu.diff.time); summary(m2)

# Helpful
m3 <- lm(helpful ~ av2 + time_diff*prior_exp, data = tu.diff.time); summary(m3)

# Worth
m4 <- lm(worth ~ av2 + time_diff*prior_exp, data = tu.diff.time); summary(m4)

# Learn
m5 <- lm(learn ~ av2 + time_diff*prior_exp, data = tu.diff.time); summary(m5)

# Not Enough
m6 <- lm(not_enough ~ av2 + time_diff*prior_exp, data = tu.diff.time); summary(m6)

texreg(list(m1, m2, m3, m4, m5, m6), stars = c(0.05, 0.1))

#-----------------------------
# Difference in time, everyone

tu.diff.time <- tu.balance %>%
  filter(baseline == 0, 
         !is.na(est_time)) %>%
  mutate(time_diff = case_when(av30 == 1 ~ est_time - 0.5,
                               av2 == 1 ~ est_time -2)) %>%
  mutate(prior_exp = ifelse(str_detect(personal, "Medicaid|Unemployment|SNAP|Medicare|TANF"), 1, 0))
summary(tu.diff.time$time_diff)

# Eligible
m1 <- lm(eligible ~ av2 + time_diff, data = tu.diff.time); summary(m1)

# Apply
m2 <- lm(apply ~ av2 + time_diff, data = tu.diff.time); summary(m2)

# Helpful
m3 <- lm(helpful ~ av2 + time_diff, data = tu.diff.time); summary(m3)

# Worth
m4 <- lm(worth ~ av2 + time_diff, data = tu.diff.time); summary(m4)

# Learn
m5 <- lm(learn ~ av2 + time_diff, data = tu.diff.time); summary(m5)

# Not Enough
m6 <- lm(not_enough ~ av2 + time_diff, data = tu.diff.time); summary(m6)

texreg(list(m1, m2, m3, m4, m5, m6), stars = c(0.05, 0.1))

#-------------------------------------------------------------------------------
# ANOVA and Regression

# DVs
dvs <- c("eligible", "apply", "helpful", "worth", "learn", "not_enough")

# Run ANOVAs for each DV and store results in a list
anova_results <- lapply(dvs, function(dv) {
  formula <- as.formula(paste(dv, "~ manipulation"))
  aov(formula, data = tu)
})

lapply(anova_results, summary)

# Run regressions for each dependent variable and store results in a list
regression_results <- lapply(dvs, function(dv) {
  formula <- as.formula(paste(dv, "~ av30 + av2 + 
                    factor(female) + factor(race) +
                    as.numeric(age) + factor(party) + 
                    factor(urbanrural) + factor(region) +
                    as.numeric(hhi) + as.numeric(education)"))
  lm(formula, data = tu.balance)
})

r.results <- lapply(regression_results, summary)
texreg(r.results)

#-------------------------------------------------------------------------------
# Summary statistics
tu.balance %>%
  select(baseline, av30, av2, 
         eligible, apply, helpful, worth, learn, not_enough,
         female, race, age, party, urbanrural, region, hhi, education, 
         pass_manip, minutes) %>%
  stargazer::stargazer(type = "latex", summary = T, digits = 2)
